**********************************************************************
** title:	The rural-urban cleavage in US presidential elections	**
** data:	GSS files												**
** purpose:	code datasets  											**
** authors:	pautonnier et al.										**
**********************************************************************

/*

** TABLE OF CONTENTS

** 1. Preamble 
** 2. Code GSS data 
** 3. Save final dataset 

*/

* -------------
* 1. preamble
* -------------

	** set working directory 
	cd "~/Dropbox/JELS_Rural_Urban_USA"
	
	** increase mat size if necessary
	clear all
	set maxvar 10000
	
	** load data
	use "Datasets/Stata/GSS_stata/gss7222_r4.dta", clear
		
* ---------------------
* 2. code the data
* ---------------------

	* vote choice (democratic vs. republican)
	gen votedem=1 if pres72==1 & (year==1973 | year==1974 | year==1975 | year==1976) // mcgovern
	replace votedem=0 if pres72==2 & (year==1973 | year==1974 | year==1975 | year==1976)	// nixon
	replace votedem=1 if pres76==1 & (year==1977 | year==1978 | year==1980)	// carter
	replace votedem=0 if pres76==2 & (year==1977 | year==1978 | year==1980)	// ford
	replace votedem=1 if pres80==1 & (year==1982 | year==1983 | year==1984)	// carter
	replace votedem=0 if pres80==2 & (year==1982 | year==1983 | year==1984)	// reagan
	replace votedem=1 if pres84==1 & (year==1985 | year==1986 | year==1987 | year==1988)	// mondale
	replace votedem=0 if pres84==2 & (year==1985 | year==1986 | year==1987 | year==1988)	// reagan
	replace votedem=1 if pres88==1 & (year==1989 | year==1990 | year==1991)	// dukakis
	replace votedem=0 if pres88==2 & (year==1989 | year==1990 | year==1991)		// bush
	replace votedem=1 if pres92==1 & (year==1993 | year==1994 | year==1996)	// clinton
	replace votedem=0 if pres92==2 & (year==1993 | year==1994 | year==1996)	// bush
	replace votedem=1 if pres96==1 & (year==1998 | year==2000)	// clinton
	replace votedem=0 if pres96==2 & (year==1998 | year==2000)	// dole
	replace votedem=1 if pres00==1 & (year==2002 | year==2004)	// gore
	replace votedem=0 if pres00==2 & (year==2002 | year==2004)	// bush
	replace votedem=1 if pres04==1 & (year==2006 | year==2008)	// kerry
	replace votedem=0 if pres04==2 & (year==2006 | year==2008)	// bush
	replace votedem=1 if pres08==1 & (year==2010 | year==2012)	// obama
	replace votedem=0 if pres08==2 & (year==2010 | year==2012)	// mccain
	replace votedem=1 if pres12==1 & (year==2014 | year==2016)	// obama
	replace votedem=0 if pres12==2 & (year==2014 | year==2016)	// romney
	replace votedem=1 if pres16==1 & (year==2018 | year==2021)	// clinton
	replace votedem=0 if pres16==2 & (year==2018 | year==2021)	// trump
	replace votedem=1 if pres20==1 // Biden
	replace votedem=0 if pres20==2 //Trump
	
	* vote choice (republican vs. democratic)
	gen voterep=1 if pres72==2 & (year==1973 | year==1974 | year==1975 | year==1976) // nixon
	replace voterep=0 if pres72==1 & (year==1973 | year==1974 | year==1975 | year==1976) // mcgovern
	replace voterep=1 if pres76==2 & (year==1977 | year==1978 | year==1980) // ford
	replace voterep=0 if pres76==1 & (year==1977 | year==1978 | year==1980) // carter
	replace voterep=1 if pres80==2 & (year==1982 | year==1983 | year==1984) // reagan
	replace voterep=0 if pres80==1 & (year==1982 | year==1983 | year==1984) // carter
	replace voterep=1 if pres84==2 & (year==1985 | year==1986 | year==1987 | year==1988) // reagan
	replace voterep=0 if pres84==1 & (year==1985 | year==1986 | year==1987 | year==1988) // mondale
	replace voterep=1 if pres88==2 & (year==1989 | year==1990 | year==1991) // bush
	replace voterep=0 if pres88==1 & (year==1989 | year==1990 | year==1991) // dukakis
	replace voterep=1 if pres92==2 & (year==1993 | year==1994 | year==1996) // bush
	replace voterep=0 if pres92==1 & (year==1993 | year==1994 | year==1996) // clinton
	replace voterep=1 if pres96==2 & (year==1998 | year==2000) // dole
	replace voterep=0 if pres96==1 & (year==1998 | year==2000) // clinton
	replace voterep=1 if pres00==2 & (year==2002 | year==2004) // bush
	replace voterep=0 if pres00==1 & (year==2002 | year==2004) // gore
	replace voterep=1 if pres04==2 & (year==2006 | year==2008) // bush
	replace voterep=0 if pres04==1 & (year==2006 | year==2008) // kerry
	replace voterep=1 if pres08==2 & (year==2010 | year==2012) // mccain
	replace voterep=0 if pres08==1 & (year==2010 | year==2012) // obama
	replace voterep=1 if pres12==2 & (year==2014 | year==2016) // romney
	replace voterep=0 if pres12==1 & (year==2014 | year==2016) // obama
	replace voterep=1 if pres16==2 & (year==2018 | year==2021) // trump
	replace voterep=0 if pres16==1 & (year==2018 | year==2021) // clinton
	replace voterep=1 if pres20==2 //Trump
	replace voterep=0 if pres20==1 // Biden
	
	* election year variable (note that election years asked about do not match survey years)
	gen eyear=1972 if (pres72==1 | pres72==2) & (votedem==0 | votedem==1)
	replace eyear=1976 if (pres76==1 | pres76==2) & (votedem==0 | votedem==1)
	replace eyear=1980 if (pres80==1 | pres80==2) & (votedem==0 | votedem==1)
	replace eyear=1984 if (pres84==1 | pres84==2) & (votedem==0 | votedem==1)
	replace eyear=1988 if (pres88==1 | pres88==2) & (votedem==0 | votedem==1)
	replace eyear=1992 if (pres92==1 | pres92==2) & (votedem==0 | votedem==1)
	replace eyear=1996 if (pres96==1 | pres96==2) & (votedem==0 | votedem==1)
	replace eyear=2000 if (pres00==1 | pres00==2) & (votedem==0 | votedem==1)
	replace eyear=2004 if (pres04==1 | pres04==2) & (votedem==0 | votedem==1)
	replace eyear=2008 if (pres08==1 | pres08==2) & (votedem==0 | votedem==1)
	replace eyear=2012 if (pres12==1 | pres12==2) & (votedem==0 | votedem==1)
	replace eyear=2016 if (pres16==1 | pres16==2) & (votedem==0 | votedem==1)
	replace eyear=2020 if (pres20==1 | pres20==2) & (votedem==0 | votedem==1)
	
	* rurality cfr. Kelly and Lobao, based on NORC segments
	gen rural=1 if xnorcsiz==1			// large central city
	replace rural=1 if xnorcsiz==2		// medium size central city
	replace rural=2 if xnorcsiz==3		// suburb of a large central city
	replace rural=2 if xnorcsiz==4		// suburb of a medium size central city
	replace rural=3 if xnorcsiz==5		// unincorporated
	replace rural=3 if xnorcsiz==6		// unincorporated
	replace rural=4 if xnorcsiz==7		// not within an smsa
	replace rural=5 if xnorcsiz==8		// town or village
	replace rural=5 if xnorcsiz==9		// incorporated area less than 2500
	replace rural=5 if xnorcsiz==10 	// open country within larger civil 
	
	gen ruraldich=1 if rural>=4 & rural<=5
	replace ruraldich=0 if rural==1 | rural==2 | rural==3
	
	* education
	clonevar education=degree
	
	* children
	gen children_recoded=0
	replace children_recoded=1 if childs >0
	
	* work status
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if wrkstat == 1 | wrkstat == 2        // In employment
	replace work_status_cond = 2 if wrkstat == 3                        // Unenmployed
	replace work_status_cond = 3 if wrkstat == 4 | wrkstat == 5        // Invalid/Retired
	replace work_status_cond = 4 if wrkstat == 7                        // Homemaker
	replace work_status_cond = 5 if wrkstat == 6                        // Student
	replace work_status_cond = 6 if wrkstat == 8                        // Autre

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "Other"
	label values work_status_cond work_status_cond_lbl
	
	* religion
	clonevar religion=relig
	gen protestant=1 if relig==1
	replace protestant=0 if inrange(relig,2,13)
	gen catholic=1 if relig==2
	replace catholic=0 if relig==1
	replace catholic=0 if inrange(relig,3,13)
	
	gen religion_recoded=1 if religion==1
	replace religion_recoded=2 if religion==2
	replace religion_recoded=3 if inrange(religion,3,13)
	
	* race
	gen white=1 if race==1
	replace white=0 if race==2|race==3
	
	* marital status
    gen married=0
	replace married=1 if marital==1
   
   * migration background
   gen migration_background = 0
	replace migration_background = 1 if born == 2
	replace migration_background = 1 if born == 1 & (paborn == 2 | maborn == 2)
   
	* sex
	gen female=1 if sex==2		// female
	replace female=0 if sex==1	// male
	
	* census region
	gen census=1 if region==1 | region==2   				// Northeast
	replace census=2 if region==3 | region==4  				// Midwest
	replace census=3 if region==5 | region==6 | region==7 	// South
	replace census=4 if region==8 | region==9 				// West
	
	* labor union member in the household
	gen laborunion=1 if inrange(union,1,3)
	replace laborunion=0 if inrange(union,4,9)
	
	* family income
	/* Note on coding: As for ANES, 1: from 0 to 16 percentile; 2: 17 to 33; 3: 34 to 67; 68 to 95; 96 to 100.*  Sometimes 90 to 100 for the fifth category.  missing for 1981 */

	gen family_income=1 if inrange(income72,1,2)| inrange(income77,1,4)| inrange(income82,1,5)| inrange(income86,1,7) | ///
	inrange(income91,1,8) | inrange(income98,1,9) | inrange(income06,1,9)| inrange(income16,1,11)
	replace family_income=2 if income72==3| inrange(income77,5,8)| inrange(income82,6,9)| inrange(income86,8,10) | ///
	inrange(income91,9,12)  | inrange(income98,10,14) | inrange(income06,10,16)| inrange(income16,12,16)
	replace family_income=3 if inrange(income72,4,6)|inrange(income77,9,12)|inrange(income82,10,14) | inrange(income86,11,16) | ///
	inrange(income91,13,17)  | inrange(income98,15,18) | inrange(income06,17,20)| inrange(income16,17,20)
	replace family_income=4 if inrange(income72,7,10)|inrange(income77,13,15)|inrange(income82,15,16) | inrange(income86,17,19) | ///
	inrange(income91,18,20) | inrange(income98,19,22) | inrange(income06,21,24)| inrange(income16,21,25)
	replace family_income=5 if inrange(income72,11,12)|income77==16| income82==17 | income86==20 | income91==21 |income98==23| ///
	income06==25 | income16==25
	
	* recode areas GSS 
	gen area_type = .
	replace area_type = 1 if xnorcsiz == 1                       // Large cities
	replace area_type = 2 if xnorcsiz == 3 | xnorcsiz == 4       // suburban areas
	replace area_type = 3 if xnorcsiz == 2                       // small cities
	replace area_type = 4 if xnorcsiz == 5 | xnorcsiz == 6 | xnorcsiz == 8 | xnorcsiz == 9  //rural towns
	replace area_type = 5 if xnorcsiz == 7 | xnorcsiz == 10      	// isolated rural areas

	label define area_type_lbl 1 "Large cities" 2 "Suburban areas" 3 "Small cities" 4 "Rural towns" 5 "Isolated rural areas"
	label values area_type area_type_lbl

	* create a simplified three-category area type version for long-term analysis
	gen area_type_simple = .
	replace area_type_simple = 1 if xnorcsiz == 1 | xnorcsiz == 2        // Large & medium cities
	replace area_type_simple = 2 if xnorcsiz == 3 | xnorcsiz == 4 | xnorcsiz == 5 | xnorcsiz == 6  // Suburbs & unincorporated areas
	replace area_type_simple = 3 if xnorcsiz == 7 | xnorcsiz == 8 | xnorcsiz == 9 | xnorcsiz == 10  // Rural areas

	label define area_simple_lbl 1 "Urban" 2 "Suburban" 3 "Rural"
	label values area_type_simple area_simple_lbl


* ----------------------------------------------
* 3. save the fully coded dataset
* ----------------------------------------------

	* dataset identifier variable
	gen dataset=""
	replace dataset="GSS"

	* save the data
	save "Datasets/Stata/GSS_stata/GSS-coded.dta", replace
	
	
	
	
	
	
	
	
	
	
	
	
